Given a set of perturbed CRISPR candidates at enhancers, generate images to visualize perturbation effects on accessibility and binding.
#Standard packages
library(rtracklayer) ; library(GenomicRanges); library(magrittr) ; library(Biostrings)
library(ggplot2) ; library(reshape2); library(plyranges); library(Rsamtools); library(parallel)
library(dplyr); library(data.table); library(patchwork); library(readr); library(testit)
#KNITR Options
setwd("/n/projects/mw2098/publications/2024_weilert_acc/code/2_analysis/")
figure_filepath<-"figures/14_plot_crispr"
options(knitr.figure_dir=figure_filepath, java.parameters = "- Xmx6g")
#Lab sources
source("scripts/r/granges_common.r")
source("scripts/r/metapeak_common.r")
source("scripts/r/knitr_common.r")
source("scripts/r/caching.r")
source("scripts/r/metapeak_functions.r")
#Specific sources
library(BSgenome.Mmusculus.UCSC.mm10)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
library(ggseqlogo)
library(rhdf5)
library(DESeq2)
source("scripts/r/motif_functions.r")
#Pre-existing variables
threads <- 5
bsgenome<-BSgenome.Mmusculus.UCSC.mm10
txdb<-TxDb.Mmusculus.UCSC.mm10.knownGene
#new directories
dir.create(paste0(figure_filepath, '/pisa'), showWarnings = F, recursive = T)
dir.create(paste0(figure_filepath, '/enhancers'), showWarnings = F, recursive = T)
#model information
tasks<-c('oct4', 'sox2', 'nanog', 'klf4', 'zic3')
motif_to_task.list<-list('Oct4-Sox2' = 'oct4', 'Sox2' = 'sox2', 'Klf4' = 'klf4', 'Zic3' = 'zic3', 'Nanog' = 'nanog')
motif_to_task.df<-data.frame(task = motif_to_task.list %>% unlist, motif = names(motif_to_task.list))
output_length <- 1000
input_length<-2032
flank_length<-(input_length - output_length) /2
#experimental data information
#Btbd11 enhancer (OS/S)
coop_perturbs.path<-'tsv/genomic/crispr/crispr_coop_predictions.tsv.gz'
coop_perturbs_w_bias.path<-'tsv/genomic/crispr/crispr_coop_predictions_with_bias.tsv.gz'
coop_scenarios.path<-'tsv/genomic/crispr/crispr_coop_scenarios.tsv.gz'
coop_chrom<-'chr10'
coop_genomic_window<-c(85539375, 85539800)
coop_genomic_window_short<-c(85539500,85539750)
crispr_coop_atac_reps.path<-Sys.glob('bw/mesc_Btbd11_scenario_WT_*_atac_*_cutsites.bw') %>% grep('distcontrol', ., invert = T, value = T)
crispr_coop_atac_combined.path<-Sys.glob('bw/mesc_Btbd11_scenario_WT_*_atac_combined_normalized.bw') %>% grep('distcontrol', ., invert = T, value = T)
#Akr1cl enhancer (S/S)
context_perturbs.path<-'tsv/genomic/crispr/crispr_context_predictions.tsv.gz'
context_perturbs_w_bias.path<-'tsv/genomic/crispr/crispr_context_predictions_with_bias.tsv.gz'
context_scenarios.path<-'tsv/genomic/crispr/crispr_context_scenarios.tsv.gz'
context_chrom<-'chr1'
context_genomic_window<-c(65037500, 65038000)
crispr_context_atac_reps.path<-c(Sys.glob('bw/mesc_Akr1cl_scenario_context_*_atac_*_cutsites.bw'),
Sys.glob('bw/mesc_Btbd11_scenario_WT_coop_WT_atac_*_cutsites.bw'))
crispr_context_nexus_reps.path<-c(Sys.glob('bw/mesc_R1_*sox2_nexus_*_positive.bw'),
Sys.glob('bw/mesc_Akr1cl_scenario_context_*sox2_nexus_*_positive.bw')) %>%
.[!grepl('combined|normalized', .)]
crispr_context_atac_combined.path<-c(Sys.glob('bw/mesc_Akr1cl_scenario_context_*_atac_combined_normalized.bw'),
'bw/mesc_Btbd11_scenario_WT_coop_WT_atac_combined_normalized.bw')
crispr_context_nexus_combined.path<-c(Sys.glob('bw/mesc_Akr1cl_scenario_context_mut_*_sox2_nexus_combined_normalized_positive.bw'),
'bw/mesc_R1_sox2_nexus_combined_normalized_positive.bw')
#general paths
motifs.path<-'tsv/mapped_motifs/all_instances_curated_0based_w_perturb.tsv.gz'
motifs.df<-readr::read_tsv(motifs.path)
atac_regions.path<-'narrowpeak/mesc_atac_peaks.narrowPeak'
sox2_regions.path<-'narrowpeak/mesc_sox2_nexus_peaks.narrowPeak'
dev_enhancers.path<-'../../public/published/Avsec_2021_et_al/mm10_enhancer_regions.xlsx'
Read in developmental enhancer information.
dev_enhancers.gr<-readxl::read_xlsx(dev_enhancers.path) %>%
tidyr::separate(col = 'mm10_coordinates_curated', into = c('seqnames','coord'), sep = ':') %>%
tidyr::separate(col = 'coord', into = c('start','end'), sep = '-') %>%
dplyr::filter(!is.na(start), ) %>%
makeGRangesFromDataFrame(keep.extra.columns = T, seqnames.field = 'seqnames', start.field = 'start', end.field = 'end')
Custom function to compare 2-way interacting DESeq2 model factors for significance. Found at ( https://www.r-bloggers.com/2024/05/a-guide-to-designs-and-contrasts-in-deseq2/).
#Kudos to: https://www.r-bloggers.com/2024/05/a-guide-to-designs-and-contrasts-in-deseq2/
contraster <- function(dds, # should contain colData and design
group1, # list of character vectors each with 2 or more items
group2, # list of character vectors each with 2 or more items
weighted = F
){
mod_mat <- model.matrix(design(dds), colData(dds)) #create matrix from the DESeq2 model
grp1_rows <- list()
grp2_rows <- list()
for(i in 1:length(group1)){
grp1_rows[[i]] <- colData(dds)[[group1[[i]][1]]] %in% group1[[i]][2:length(group1[[i]])] #Subselect DESeq2 model based on desired grouping indexes
}
for(i in 1:length(group2)){
grp2_rows[[i]] <- colData(dds)[[group2[[i]][1]]] %in% group2[[i]][2:length(group2[[i]])] #Subselect DESeq2 model based on desired grouping indexes
}
#Remove any observations that aren't shared
grp1_rows <- Reduce(function(x, y) x & y, grp1_rows)
grp2_rows <- Reduce(function(x, y) x & y, grp2_rows)
mod_mat1 <- mod_mat[grp1_rows, ,drop=F]
mod_mat2 <- mod_mat[grp2_rows, ,drop=F]
if(!weighted){
mod_mat1 <- mod_mat1[!duplicated(mod_mat1),,drop=F]
mod_mat2 <- mod_mat2[!duplicated(mod_mat2),,drop=F]
}
return(colMeans(mod_mat1)-colMeans(mod_mat2))
}
perturbs.df<-readr::read_tsv(coop_perturbs_w_bias.path) %>% dplyr::mutate(state = factor(state, c('AB','A','B','null')))
scenarios.df<-readr::read_tsv(coop_scenarios.path) %>%
dplyr::mutate(scenario_state = paste0(scenario, '_', state))
perturbs.df %>%
dplyr::group_by(scenario, state, model_name) %>%
dplyr::summarize(sum_pred = sum(pred)) %>%
tidyr::pivot_wider(names_from = state, values_from = sum_pred)
## # A tibble: 30 × 6
## # Groups: scenario [3]
## scenario model_name AB A B null
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 WT_coop atac_0h_fold1 52809. 29734. 44686. 25960.
## 2 WT_coop atac_12h_fold1 49601. 49302. 41545. 41118.
## 3 WT_coop atac_15h_fold1 22530. 23674. 20181. 20932.
## 4 WT_coop atac_3h_fold1 78326. 57549. 72700. 53742.
## 5 WT_coop atac_6h_fold1 71464. 61467. 59353. 52740.
## 6 WT_coop atac_9h_fold1 55506. 56850. 51031. 52431.
## 7 WT_coop atac_wt_fold1 11774. 3424. 9278. 3137.
## 8 WT_coop atac_wt_fold2 3959. 2011. 2641. 1816.
## 9 WT_coop atac_wt_fold3 10097. 3348. 5990. 2233.
## 10 WT_coop bpnet_osknz_fold1 37947. 15303. 14973. 9639.
## # ℹ 20 more rows
For perturbations and experiments, what are the differences between the different scenarios and states?
#Plot perturbation summaries
perturbs_reads.plot<-rbind(
perturbs.df %>% dplyr::filter(model_name=='atac_wt_fold1', genomic_position %in% c(coop_genomic_window[1]:coop_genomic_window[2])) %>% dplyr::mutate(range = 'full_window'),
perturbs.df %>% dplyr::filter(model_name=='atac_wt_fold1', genomic_position %in% c(coop_genomic_window_short[1]:coop_genomic_window_short[2])) %>% dplyr::mutate(range = '250bp_window')
) %>%
dplyr::group_by(scenario, state, range) %>%
dplyr::summarize(sum_pred = sum(pred)) %>%
ggplot(., aes(x = state, y = sum_pred))+
geom_bar(stat = 'identity')+
facet_grid(scenario ~ range)+
theme_classic()
coop.df<-rbind(
perturbs.df %>% dplyr::filter(model_name=='atac_wt_fold1', genomic_position %in% c(coop_genomic_window[1]:coop_genomic_window[2])) %>% dplyr::mutate(range = 'full_window'),
perturbs.df %>% dplyr::filter(model_name=='atac_wt_fold1', genomic_position %in% c(coop_genomic_window_short[1]:coop_genomic_window_short[2])) %>% dplyr::mutate(range = '250bp_window')
) %>%
dplyr::group_by(state, scenario, range) %>%
dplyr::summarize(preds = sum(pred)) %>%
tidyr::pivot_wider(names_from = state, values_from = preds) %>%
dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
log_add_coop = round(log(add_coop), 2),
AB_vs_B = AB/B,
log_AB_vs_B = log(AB_vs_B))
print(coop.df)
## # A tibble: 6 × 10
## # Groups: scenario [3]
## scenario range AB A B null add_coop log_add_coop AB_vs_B
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 WT_coop 250bp_window 3848. 817. 2760. 534. 1.32 0.28 1.39
## 2 WT_coop full_window 5135. 1098. 3685. 760. 1.34 0.29 1.39
## 3 dist_coop 250bp_window 1489. 724. 1022. 481. 1.29 0.25 1.46
## 4 dist_coop full_window 1903. 917. 1351. 637. 1.27 0.24 1.41
## 5 enh_coop 250bp_window 2380. 817. 1925. 534. 1.1 0.1 1.24
## 6 enh_coop full_window 3147. 1098. 2527. 760. 1.13 0.12 1.25
## # ℹ 1 more variable: log_AB_vs_B <dbl>
perturbs_coop.plot<-ggplot(coop.df, aes(x = scenario, y = log_add_coop))+
geom_bar(stat = 'identity')+
facet_grid(. ~ range)+
theme_classic()
perturbs.plot<-perturbs_reads.plot + perturbs_coop.plot
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_summary.png'), perturbs.plot, height = 6, width = 7)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_summary.pdf'), perturbs.plot, height = 6, width = 7)
Print additional scenarios to verify our cooperativity.
#Assess Oct4 cooperativity in these different scenarios
oct4_coop.df<-perturbs.df %>%
dplyr::filter(model_name%in%c('bpnet_osknz_fold1', 'atac_wt_fold1'),
genomic_position %in% c(coop_genomic_window[1]:coop_genomic_window[2])) %>%
dplyr::group_by(task, scenario, state) %>%
dplyr::summarize(preds = sum(pred)) %>%
tidyr::pivot_wider(names_from = state, values_from = preds) %>%
dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
dir_coop = round((AB - B + null)/(A), 2),
log_add_coop = round(log(add_coop), 2))
print(oct4_coop.df)
## # A tibble: 18 × 9
## # Groups: task, scenario [18]
## task scenario AB A B null add_coop dir_coop log_add_coop
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 atac WT_coop 5135. 1098. 3685. 760. 1.34 2.01 0.29
## 2 atac dist_coop 1903. 917. 1351. 637. 1.27 1.3 0.24
## 3 atac enh_coop 3147. 1098. 2527. 760. 1.13 1.26 0.12
## 4 klf4 WT_coop 1854. 680. 826. 493. 2.62 2.24 0.96
## 5 klf4 dist_coop 501. 487. 357. 340. 0.98 0.99 -0.02
## 6 klf4 enh_coop 1564. 680. 725. 493. 2.56 1.96 0.94
## 7 nanog WT_coop 11906. 4972. 2519. 1594. 2.4 2.21 0.88
## 8 nanog dist_coop 2478. 1963. 1007. 753. 1.18 1.13 0.17
## 9 nanog enh_coop 7478. 4972. 1883. 1594. 1.6 1.45 0.47
## 10 oct4 WT_coop 1485. 479. 694. 345. 2.36 2.37 0.86
## 11 oct4 dist_coop 383. 354. 298. 269. 1 1 0
## 12 oct4 enh_coop 2231. 479. 1101. 345. 2.12 3.08 0.75
## 13 sox2 WT_coop 936. 395. 312. 192. 2.31 2.07 0.84
## 14 sox2 dist_coop 269. 251. 149. 138. 1.05 1.03 0.05
## 15 sox2 enh_coop 1024. 395. 337. 192. 2.4 2.23 0.88
## 16 zic3 WT_coop 6030. 2200. 1686. 920. 2.5 2.39 0.92
## 17 zic3 dist_coop 1325. 1226. 585. 524. 1.05 1.03 0.05
## 18 zic3 enh_coop 3950. 2200. 1304. 920. 1.82 1.62 0.6
#Assess all cooperativity when Zic3 is absent
coop_w_zic3.df<-readr::read_tsv('tsv/genomic/crispr/crispr_coop_predictions_with_zic3_mut_with_bias.tsv.gz') %>% dplyr::mutate(state = factor(state, c('AB','A','B','null'))) %>%
dplyr::filter(model_name%in%c('bpnet_osknz_fold1', 'atac_wt_fold1'),
genomic_position %in% c(coop_genomic_window[1]:coop_genomic_window[2])) %>%
dplyr::group_by(task, scenario, state) %>%
dplyr::summarize(preds = sum(pred)) %>%
tidyr::pivot_wider(names_from = state, values_from = preds) %>%
dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
dir_coop = round((AB - B + null)/(A), 2),
log_add_coop = round(log(add_coop), 2))
print(coop_w_zic3.df)
## # A tibble: 18 × 9
## # Groups: task, scenario [18]
## task scenario AB A B null add_coop dir_coop log_add_coop
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 atac WT_coop 4306. 775. 2914. 520. 1.43 2.47 0.36
## 2 atac dist_coop 1371. 589. 909. 400. 1.39 1.46 0.33
## 3 atac enh_coop 2741. 775. 1915. 520. 1.35 1.74 0.3
## 4 klf4 WT_coop 963. 275. 467. 196. 2.19 2.51 0.78
## 5 klf4 dist_coop 256. 251. 178. 176. 1.05 1.02 0.05
## 6 klf4 enh_coop 884. 275. 434. 196. 2.17 2.35 0.77
## 7 nanog WT_coop 4672. 1298. 1414. 501. 2.44 2.9 0.89
## 8 nanog dist_coop 912. 722. 451. 355. 1.2 1.13 0.18
## 9 nanog enh_coop 3540. 1298. 1229. 501. 1.99 2.17 0.69
## 10 oct4 WT_coop 803. 201. 383. 149. 2.29 2.84 0.83
## 11 oct4 dist_coop 199. 185. 149. 139. 1.07 1.02 0.07
## 12 oct4 enh_coop 1541. 201. 746. 149. 2.15 4.71 0.77
## 13 sox2 WT_coop 484. 159. 186. 86.5 2.31 2.42 0.84
## 14 sox2 dist_coop 138. 130. 80.0 76.7 1.08 1.04 0.08
## 15 sox2 enh_coop 647. 159. 239. 86.5 2.48 3.1 0.91
## 16 zic3 WT_coop 691. 186. 272. 96.3 2.24 2.77 0.81
## 17 zic3 dist_coop 150. 137. 83.2 77.0 1.11 1.05 0.1
## 18 zic3 enh_coop 588. 186. 246. 96.3 2.05 2.35 0.72
experimental_scenarios<-c('WT')
experimental_reads.df<-lapply(experimental_scenarios, function(y){
message('CRISPR scenario: ', y)
if(y=='dist'){reps<-2:3} else{reps<-1:2}
reads.df<-lapply(reps, function(z){
#Define files
atac_exp.bw.list<-list(
'AB' = paste0('../1_processing/bw/mesc_Btbd11_scenario_', y, '_coop_WT_atac_', z, '_cutsites_normalized.bw'),
'A' = paste0('../1_processing/bw/mesc_Btbd11_scenario_', y, '_coop_mut_A_atac_', z, '_cutsites_normalized.bw'),
'B' = paste0('../1_processing/bw/mesc_Btbd11_scenario_', y, '_coop_mut_B_atac_', z, '_cutsites_normalized.bw'),
'null' = paste0('../1_processing/bw/mesc_Btbd11_scenario_', y, '_coop_mut_null_atac_', z, '_cutsites_normalized.bw')
)
if(y=='dist'){atac_exp.bw.list$AB<-paste0('../1_processing/bw/mesc_Btbd11_scenario_', y, '_coop_mut_AB_atac_', z, '_cutsites_normalized.bw')}
#Collect reads
atac_reads.df<-lapply(names(atac_exp.bw.list), function(x){
#Collect reads
df<-rbind(
lapply(names(atac_exp.bw.list), function(x){
gr<-GRanges(coop_chrom, IRanges(start =coop_genomic_window[1], end = coop_genomic_window[2]))
df<-data.frame(reads = regionSums(gr, atac_exp.bw.list[[x]]),
state = x,
scenario = y,
range = 'full_window')
}) %>% rbindlist(),
lapply(names(atac_exp.bw.list), function(x){
gr<-GRanges(coop_chrom, IRanges(start =coop_genomic_window_short[1], end = coop_genomic_window_short[2]))
df<-data.frame(reads = regionSums(gr, atac_exp.bw.list[[x]]),
state = x,
scenario = y,
range = '250bp_window')
}) %>% rbindlist()
)
}) %>% rbindlist()
}) %>% rbindlist()
}) %>% rbindlist()
#Collect summary values
reads_summary.df<-experimental_reads.df %>%
dplyr::group_by(state, scenario, range) %>%
dplyr::summarize(median_reads = median(reads)) %>%
dplyr::mutate(state = factor(state, c('AB','A','B','null')))
exp_reads.plot<-ggplot()+
geom_bar(data = reads_summary.df, aes(x = state, y = median_reads), stat = 'identity')+
geom_point(data = experimental_reads.df, aes(x = state, y = reads))+
scale_y_continuous(name = 'RPM norm. observed ATAC-seq reads')+
scale_x_discrete(name = 'CRISPR line')+
facet_grid(scenario ~ range)+
ggtitle('Observed reads')+
theme_classic()
#Collect cooperativity values from the pooled samples
pooled_reads.df<-lapply(experimental_scenarios, function(y){
message('CRISPR scenario: ', y)
#Define files
atac_exp.bw.list<-list(
'AB' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_WT_atac_cutsites_combined_normalized.bw'),
'A' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_A_atac_cutsites_combined_normalized.bw'),
'B' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_B_atac_cutsites_combined_normalized.bw'),
'null' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_null_atac_cutsites_combined_normalized.bw')
)
if(y=='dist'){atac_exp.bw.list$AB<-paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_AB_atac_cutsites_combined_normalized.bw')}
#Collect reads
atac_reads.df<-rbind(
lapply(names(atac_exp.bw.list), function(x){
gr<-GRanges(coop_chrom, IRanges(start =coop_genomic_window[1], end = coop_genomic_window[2]))
df<-data.frame(reads = regionSums(gr, atac_exp.bw.list[[x]]),
state = x,
scenario = y,
range = 'full_window')
}) %>% rbindlist(),
lapply(names(atac_exp.bw.list), function(x){
gr<-GRanges(coop_chrom, IRanges(start =coop_genomic_window_short[1], end = coop_genomic_window_short[2]))
df<-data.frame(reads = regionSums(gr, atac_exp.bw.list[[x]]),
state = x,
scenario = y,
range = '250bp_window')
}) %>% rbindlist()
)
}) %>% rbindlist()
coop_summary.df<-pooled_reads.df %>%
tidyr::pivot_wider(names_from = state, values_from = reads) %>%
dplyr::group_by(range) %>%
dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
log_add_coop = round(log(add_coop), 2),
AB_vs_B = AB/B,
log_AB_vs_B = log(AB_vs_B))
print(coop_summary.df)
## # A tibble: 2 × 10
## # Groups: range [2]
## scenario range AB A B null add_coop log_add_coop AB_vs_B
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 WT full_window 20.3 13.1 13.3 8.28 1.22 0.2 1.52
## 2 WT 250bp_window 15.5 9.97 10.3 6.09 1.16 0.15 1.50
## # ℹ 1 more variable: log_AB_vs_B <dbl>
exp_coop.plot<-ggplot(coop_summary.df %>% dplyr::select(scenario, log_add_coop), aes(x = 1, y = log_add_coop))+
geom_bar(stat = 'identity')+
facet_grid(scenario ~ range)+
scale_y_continuous(name = 'Cooperativity')+
ggtitle('Cooperativity')+
theme_classic() +theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank())
exp.plot<-exp_reads.plot + exp_coop.plot + plot_layout(widths = c(2, 1))
exp.plot
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_experimental_summary.png'), exp.plot, height = 7, width = 7)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_experimental_summary.pdf'), exp.plot, height = 7, width = 7)
Show how cooperativity changes based on the window that we explore across. There is an upstream cluster of motifs.
coord_plot<-motifs.df %>% dplyr::filter(region_id==97554 | region_id==97555) %>%
dplyr::select(motif, start, end) %>%
ggplot(.)+
geom_hline(yintercept = 0)+
geom_rect(aes(xmin = start, xmax = end, ymin = -1, ymax = 1))+
geom_text(aes(x = start, y = 2, label = motif))+
scale_x_continuous(limits = c(coop_genomic_window[1], coop_genomic_window[1]+1000)) +
theme_classic()
ggsave(paste0(figure_filepath, '/Btbd11_coordinate_range.png'), coord_plot, height = 3, width = 12)
ggsave(paste0(figure_filepath, '/Btbd11_coordinate_range.pdf'), coord_plot, height = 3, width = 12)
#Collect cooperativity values from the pooled samples
range_exp.df<-lapply(experimental_scenarios, function(y){
message('CRISPR scenario: ', y)
#Define files
atac_exp.bw.list<-list(
'AB' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_WT_atac_cutsites_combined_normalized.bw'),
'A' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_A_atac_cutsites_combined_normalized.bw'),
'B' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_B_atac_cutsites_combined_normalized.bw'),
'null' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_null_atac_cutsites_combined_normalized.bw')
)
#Collect reads
mclapply(seq(200, 1200, 100), function(z){
lapply(names(atac_exp.bw.list), function(x){
gr<-GRanges(coop_chrom, IRanges(start =coop_genomic_window[1], end = coop_genomic_window[2])) %>% GenomicRanges::resize(., z, 'start')
if(end(gr)>max(perturbs.df$genomic_position)) {keep = F} else {keep = T}
df<-data.frame(exp_reads = regionSums(gr, atac_exp.bw.list[[x]]),
pred_reads = perturbs.df %>% dplyr::filter(model_name=='atac_wt_fold1',
task == 'atac',
state == x,
scenario == paste0(y, '_coop'),
genomic_position %in% start(gr):end(gr)) %>%
dplyr::mutate(keep = keep) %>% dplyr::mutate(pred_keep = ifelse(keep, pred, NA)) %>% .$pred_keep %>% sum,
state = x,
scenario = y,
range = z)
return(df)
}) %>% rbindlist()
}, mc.cores = 6) %>% rbindlist()
}) %>% rbindlist()
range_coop_summary.df<-range_exp.df %>%
dplyr::filter(scenario == 'WT') %>%
tidyr::pivot_longer(cols = c('exp_reads', 'pred_reads'), names_to = 'type', values_to = 'reads') %>%
tidyr::pivot_wider(names_from = state, values_from = reads) %>%
dplyr::group_by(range, type) %>%
dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
log_add_coop = round(log(add_coop), 2),
AB_vs_B = AB/B,
log_AB_vs_B = log(AB_vs_B))
print(range_coop_summary.df)
## # A tibble: 22 × 11
## # Groups: range, type [22]
## scenario range type AB A B null add_coop log_add_coop
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 WT 200 exp_reads 6.27 3.81 3.35e0 2.66e0 1.96 0.67
## 2 WT 200 pred_reads 1153. 270. 7.65e2 2.01e2 1.5 0.41
## 3 WT 300 exp_reads 14.0 8.81 8.73e0 6.03e0 1.45 0.37
## 4 WT 300 pred_reads 3142. 688. 2.22e3 4.78e2 1.36 0.31
## 5 WT 400 exp_reads 19.3 12.4 1.26e1 7.72e0 1.21 0.19
## 6 WT 400 pred_reads 4762. 1025. 3.41e3 7.03e2 1.34 0.29
## 7 WT 500 exp_reads 23.8 15.9 1.62e1 1.05e1 1.21 0.19
## 8 WT 500 pred_reads 6132. 1366. 4.47e3 1.01e3 1.34 0.29
## 9 WT 600 exp_reads 30.0 21.1 2.21e1 1.52e1 1.16 0.15
## 10 WT 600 pred_reads 7621. 1878. 5.72e3 1.53e3 1.34 0.29
## # ℹ 12 more rows
## # ℹ 2 more variables: AB_vs_B <dbl>, log_AB_vs_B <dbl>
range.plot<-ggplot(range_coop_summary.df, aes(x = range, y = log_add_coop, color = type))+
geom_line()+
geom_point()+
scale_y_continuous(name = 'ln(add_coop.)')+
scale_x_continuous(name = 'Considered coop. region (bp)\naround S/OS motif pair')+
theme_classic()
range.plot
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_experimental_ranges.png'), range.plot, height = 3, width = 5)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_experimental_ranges.pdf'), range.plot, height = 3, width = 5)
Perform statistics on the differences.
#Do statistics on these values for significance
stats.df<-lapply(experimental_scenarios, function(x){
lapply(experimental_reads.df$state %>% unique, function(y){
ttest<-t.test(experimental_reads.df %>% dplyr::filter(state=='AB', scenario==x) %>% .$reads,
experimental_reads.df %>% dplyr::filter(state==y, scenario==x) %>% .$reads,
alternative = 'greater')
message(x, y)
print(ttest)
data.frame(
pval = round(ttest$p.value, 3),
scenario = x,
state = y,
comparison = paste0('AB_vs_', y)
)
}) %>% rbindlist()
}) %>% rbindlist()
##
## Welch Two Sample t-test
##
## data: experimental_reads.df %>% dplyr::filter(state == "AB", scenario == x) %>% .$reads and experimental_reads.df %>% dplyr::filter(state == y, scenario == x) %>% .$reads
## t = 0, df = 30, p-value = 0.5
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -1.4887 Inf
## sample estimates:
## mean of x mean of y
## 17.87743 17.87743
##
## Welch Two Sample t-test
##
## data: experimental_reads.df %>% dplyr::filter(state == "AB", scenario == x) %>% .$reads and experimental_reads.df %>% dplyr::filter(state == y, scenario == x) %>% .$reads
## t = 8.6248, df = 25.639, p-value = 2.357e-09
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 5.105854 Inf
## sample estimates:
## mean of x mean of y
## 17.87743 11.51212
##
## Welch Two Sample t-test
##
## data: experimental_reads.df %>% dplyr::filter(state == "AB", scenario == x) %>% .$reads and experimental_reads.df %>% dplyr::filter(state == y, scenario == x) %>% .$reads
## t = 8.1444, df = 25.964, p-value = 6.38e-09
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 4.782816 Inf
## sample estimates:
## mean of x mean of y
## 17.87743 11.82756
##
## Welch Two Sample t-test
##
## data: experimental_reads.df %>% dplyr::filter(state == "AB", scenario == x) %>% .$reads and experimental_reads.df %>% dplyr::filter(state == y, scenario == x) %>% .$reads
## t = 15.7, df = 20.951, p-value = 2.319e-13
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 9.524355 Inf
## sample estimates:
## mean of x mean of y
## 17.877425 7.180516
print(stats.df)
## pval scenario state comparison
## <num> <char> <char> <char>
## 1: 0.5 WT AB AB_vs_AB
## 2: 0.0 WT A AB_vs_A
## 3: 0.0 WT B AB_vs_B
## 4: 0.0 WT null AB_vs_null
Smooth the plots.
acc_plots.list<-mclapply(perturbs.df$scenario %>% unique, function(x){
p.df<-perturbs.df %>% dplyr::filter(scenario==x, grepl('wt', model_name))
s.df<-scenarios.df %>% dplyr::filter(scenario==x)
#Calculate cooperativity value
log_add_coop<-p.df %>%
dplyr::group_by(state) %>%
dplyr::summarize(preds = sum(pred)) %>%
tidyr::pivot_wider(names_from = state, values_from = preds) %>%
dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
log_add_coop = round(log(add_coop), 2))
acc.plot<-ggplot(p.df) +
geom_vline(xintercept = s.df$genomic_center_OS, linetype = 'dashed')+
geom_vline(xintercept = s.df$genomic_center_S, linetype = 'dashed')+
stat_smooth(aes(x = genomic_position, y = pred, color = state), method = 'loess', span = .2)+
scale_x_continuous(name = paste0('Genomic position (bp)'), limits = coop_genomic_window)+
# scale_x_continuous(name = paste0('Genomic position (bp)'))+
scale_y_continuous(name = 'Predicted ATAC-seq')+
scale_color_manual(name = 'State',values = c('#542788', '#2166ac', '#b2182b', 'gray'))+
ggtitle(p.df$scenario %>% unique, subtitle = paste0('Add coop:', log_add_coop$add_coop, ', Log add coop:', log_add_coop$log_add_coop))+
facet_grid(. ~ model_name) +
theme_classic()
return(acc.plot)
})
acc_plots.plot<-wrap_plots(acc_plots.list)+plot_layout(ncol = 1)
acc_plots.plot
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_smoothed.png'), acc_plots.plot, width = 16, height = 10)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_smoothed.pdf'), acc_plots.plot, width = 16, height = 10)
Plot raw data as well.
acc_plots.list<-mclapply(perturbs.df$scenario %>% unique, function(x){
p.df<-perturbs.df %>% dplyr::filter(scenario==x, grepl('wt', model_name))
s.df<-scenarios.df %>% dplyr::filter(scenario==x)
#Calculate cooperativity value
log_add_coop<-p.df %>%
dplyr::group_by(state) %>%
dplyr::summarize(preds = sum(pred)) %>%
tidyr::pivot_wider(names_from = state, values_from = preds) %>%
dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
log_add_coop = round(log(add_coop), 2))
acc.plot<-ggplot(p.df) +
geom_vline(xintercept = s.df$genomic_center_OS, linetype = 'dashed')+
geom_vline(xintercept = s.df$genomic_center_S, linetype = 'dashed')+
geom_line(aes(x = genomic_position, y = pred, color = state))+
scale_x_continuous(name = paste0('Genomic position (bp)'), limits = coop_genomic_window)+
# scale_x_continuous(name = paste0('Genomic position (bp)'))+
scale_y_continuous(name = 'Predicted ATAC-seq', limits = c(0, 50))+
scale_color_manual(name = 'State',values = c('#542788', '#2166ac', '#b2182b', 'gray'))+
ggtitle(p.df$scenario %>% unique, subtitle = paste0('Add coop:', log_add_coop$add_coop, ', Log add coop:', log_add_coop$log_add_coop))+
facet_grid(. ~ model_name) +
theme_classic()
return(acc.plot)
})
acc_plots.plot<-wrap_plots(acc_plots.list)+plot_layout(ncol = 1)
acc_plots.plot
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_raw.png'), acc_plots.plot, width = 16, height = 10)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_raw.pdf'), acc_plots.plot, width = 16, height = 10)
experimental_scenarios<-c('WT')
acc_plots.list<-lapply(experimental_scenarios, function(y){
message('CRISPR scenario: ', y)
atac_exp.bw.list<-list(
'AB' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_WT_atac_combined_normalized.bw'),
'A' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_A_atac_combined_normalized.bw'),
'B' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_B_atac_combined_normalized.bw'),
'null' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_null_atac_combined_normalized.bw')
)
if(y=='dist'){atac_exp.bw.list$AB<-paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_AB_atac_combined_normalized.bw')}
atac_exp.df<-lapply(names(atac_exp.bw.list), function(x){
standard_metapeak_matrix(GRanges(coop_chrom, IRanges(start =coop_genomic_window[1], end = coop_genomic_window[2])),
atac_exp.bw.list[[x]], keep_region_coordinates = T) %>%
as.data.frame() %>% transpose() %>%
dplyr::rename(pred = V1) %>%
dplyr::mutate(genomic_position = coop_genomic_window[1]:coop_genomic_window[2],
sample = x)
}) %>% rbindlist() %>%
dplyr::mutate(sample = factor(sample, names(atac_exp.bw.list)))
atac_exp.plot<-ggplot(atac_exp.df) +
geom_vline(xintercept = scenarios.df %>% dplyr::filter(scenario=='WT_coop') %>% .$genomic_center_OS, linetype = 'dashed')+
geom_vline(xintercept = scenarios.df %>% dplyr::filter(scenario=='WT_coop') %>% .$genomic_center_S, linetype = 'dashed')+
geom_line(aes(x = genomic_position, y = pred, color = sample))+
scale_x_continuous(name = paste0('Genomic position (bp)'), limits = coop_genomic_window)+
scale_y_continuous(name = 'RPM-normalized ATAC-seq fragments', limits = c(0, 3.2))+
scale_color_manual(name = 'State', values = c('#542788', '#2166ac', '#b2182b', 'gray'))+
ggtitle(y)+
theme_classic()
atac_exp.plot
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_scenario_', y, '_experiments_RPM_fragments.png'), atac_exp.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_scenario_', y, '_experiments_RPM_fragments.pdf'), atac_exp.plot, width = 7, height = 3)
print(atac_exp.df %>% dplyr::group_by(sample) %>%
dplyr::summarize(sum_reads = sum(pred)) %>%
tidyr::pivot_wider(names_from = sample, values_from = sum_reads) %>%
dplyr::mutate(coop = log((AB-null)/(A - null + B - null))))
})
## # A tibble: 1 × 5
## AB A B null coop
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 904. 663. 595. 470. 0.314
experimental_scenarios<-c('WT')
acc_plots.list<-lapply(experimental_scenarios, function(y){
message('CRISPR scenario: ', y)
atac_exp.bw.list<-list(
'AB' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_WT_atac_cutsites_combined_normalized.bw'),
'A' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_A_atac_cutsites_combined_normalized.bw'),
'B' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_B_atac_cutsites_combined_normalized.bw'),
'null' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_null_atac_cutsites_combined_normalized.bw')
)
if(y=='dist'){atac_exp.bw.list$AB<-paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_AB_atac_cutsites_combined_normalized.bw')}
atac_exp.df<-lapply(names(atac_exp.bw.list), function(x){
standard_metapeak_matrix(GRanges(coop_chrom, IRanges(start =coop_genomic_window[1], end = coop_genomic_window[2])),
atac_exp.bw.list[[x]], keep_region_coordinates = T) %>%
as.data.frame() %>% transpose() %>%
dplyr::rename(pred = V1) %>%
dplyr::mutate(genomic_position = coop_genomic_window[1]:coop_genomic_window[2],
sample = x)
}) %>% rbindlist() %>%
dplyr::mutate(sample = factor(sample, names(atac_exp.bw.list)))
atac_exp.plot<-ggplot(atac_exp.df) +
geom_vline(xintercept = scenarios.df %>% dplyr::filter(scenario=='WT_coop') %>% .$genomic_center_OS, linetype = 'dashed')+
geom_vline(xintercept = scenarios.df %>% dplyr::filter(scenario=='WT_coop') %>% .$genomic_center_S, linetype = 'dashed')+
stat_smooth(aes(x = genomic_position, y = pred, color = sample), method = 'loess', span = .2)+
scale_x_continuous(name = paste0('Genomic position (bp)'), limits = coop_genomic_window)+
scale_y_continuous(name = 'RPM ATAC-seq cutsites(smooth)')+
scale_color_manual(name = 'State',values = c('#542788', '#2166ac', '#b2182b', 'gray'))+
ggtitle(y)+
theme_classic()
atac_exp.plot
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_scenario_', y, '_experiments_RPM_cutsites_smooth.png'), atac_exp.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_scenario_', y, '_experiments_RPM_cutsites_smooth.pdf'), atac_exp.plot, width = 7, height = 3)
})
curated_enhancer_boundaries.gr<-GRanges(coop_chrom, IRanges(coop_genomic_window[1], coop_genomic_window[2]))
Generate contribution scores of mapped motifs.
curated_enhancer.df<-data.frame(
acc_contrib = standard_metapeak_matrix(regions.gr = curated_enhancer_boundaries.gr, sample.cov = 'shap/atac_wt_fold1_atac_counts.bw',
keep_region_coordinates = T) %>% t(),
seq = getSeq(bsgenome, curated_enhancer_boundaries.gr, as.character = T) %>% stringr::str_split_1(pattern = ''),
position = start(curated_enhancer_boundaries.gr):end(curated_enhancer_boundaries.gr)
)
# Generate sequence logo
acc_contrib.plot<-curated_enhancer.df%>%
tidyr::pivot_wider(names_from = seq, values_from = acc_contrib) %>%
dplyr::select(A, C, G, T) %>% as.matrix() %>% t() %>%
ggseqlogo(., method='custom', seq_type='dna') +
scale_y_continuous(name = 'Acc. contribution.')+
scale_x_discrete(name = 'Genomic position (chr10, bp)')+
ggtitle('CRISCR enhancer')
acc_contrib.plot
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_candidate_contrib.png'), acc_contrib.plot, height = 2, width = 20)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_candidate_contrib.pdf'), acc_contrib.plot, height = 2, width = 20)
seqs.fa<-seqinr::read.fasta('fasta/crispr_coop_seqs.fa')
filler<-mclapply(Sys.glob('shap/crispr/crispr_coop_*_counts.h5'), function(y){
contrib.h5<-rhdf5::h5read(y, '/')
hyp_scores.list<-apply(contrib.h5$hyp_scores, 3, function(x){
as.data.frame(x) %>% transpose()
})
input_seqs.list<-apply(contrib.h5$input_seqs, 3, function(x){
df<-as.data.frame(x)
df<-dplyr::mutate_all(df, function(x) as.numeric(as.character(x)))
return(df %>% transpose)
})
contrib.list<-lapply(1:length(hyp_scores.list), function(x){
mat<-input_seqs.list[[x]] * hyp_scores.list[[x]]
colnames(mat)<-c('A','C','G','T')
return(mat %>% t())
})
names(contrib.list)<-names(seqs.fa)
#Visualize contribution scores
contrib.plot<- ggseqlogo(contrib.list, method='custom', seq_type='dna') +
scale_y_continuous(name = 'Contribution') +
scale_x_continuous(limits = c(700, 1200)) +
facet_wrap(~seq_group, ncol=4, scales='free_x') +
theme(axis.line = element_line(), axis.ticks = element_line())+
ggtitle(y)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_contribution_', basename(y), '.png'), contrib.plot, width = 16, height = 10)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_contribution_', basename(y), '.pdf'), contrib.plot, width = 16, height = 10)
}, mc.cores = 6)
Confirm motif sequences are indeed high/low-affinity
readr::read_tsv('tsv/insilico/marginalizations/crispr/btbd11_marginalizations_atac_wt.tsv.gz')
## # A tibble: 2 × 6
## seqA motifA seqB motifB state atac
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 ATTAGCATTACAATT all none empty A 6.80
## 2 none all none empty null 6.03
readr::read_tsv('tsv/insilico/marginalizations/crispr/btbd11_marginalizations_bpnet_osknz.tsv.gz')
## # A tibble: 2 × 10
## seqA motifA seqB motifB state oct4 sox2 klf4 nanog zic3
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ATTAGCATTACAATT all none empty A 6.14 5.16 5.62 6.22 4.79
## 2 none all none empty null 5.29 4.61 5.31 5.58 4.39
invivo_marg.df<-readr::read_tsv('tsv/insilico/marginalizations/motifs/all_marginalizations_expanded.tsv.gz') %>%
dplyr::filter(seq %in% c(motifs.df %>% dplyr::filter(region_id == 97547) %>% .$seq, 'ATTAGCATTACAATT'))
invivo_marg.df %>% as.data.frame
## seq oct4_marg oct4_inj sox2_marg sox2_inj klf4_marg klf4_inj
## 1 GCTCAGCTGGG 0.0036268 5.295044 0.0027845 4.616916 0.0043634 5.313706
## nanog_marg nanog_inj zic3_marg zic3_inj atac_wt_marg atac_wt_inj atac_wt_null
## 1 0.0039029 5.586322 0.1526327 4.539115 0.0361964 6.061814 6.025618
## atac_0h_marg atac_0h_inj atac_0h_null atac_3h_marg atac_3h_inj atac_3h_null
## 1 0.026695 9.447199 9.420504 0.036211 9.683814 9.647603
## atac_6h_marg atac_6h_inj atac_6h_null atac_9h_marg atac_9h_inj atac_9h_null
## 1 0.030346 9.855091 9.824745 0.031047 9.630625 9.599578
## atac_12h_marg atac_12h_inj atac_12h_null atac_15h_marg atac_15h_inj
## 1 0.022422 9.730329 9.707907 0.025415 9.496419
## atac_15h_null motif
## 1 9.471004 Zic3
motifs.df %>% dplyr::filter(seq=='ATTATCATTATAATT') %>% nrow(.)
## [1] 1
Between all other motifs, what does our model think is happening?
pairs.df<-readr::read_tsv('tsv/genomic/all_pairs_curated_0based_w_perturb.tsv.gz')
pairs.df %>% dplyr::filter(region_id == 97554) %>% dplyr::select(motif.x, pattern_center.x, width.x)
## # A tibble: 6 × 3
## motif.x pattern_center.x width.x
## <chr> <dbl> <dbl>
## 1 Klf4 560 11
## 2 Klf4 560 11
## 3 Nanog 536 8
## 4 Nanog 536 8
## 5 Sox2 441 9
## 6 Sox2 441 9
pairs.df %>% dplyr::filter(region_id == 97554) %>%
dplyr::select(motif.x, motif.y,
`atac_wt/atac/hAB/pred_sum`, `atac_wt/atac/hA/pred_sum`, `atac_wt/atac/hB/pred_sum`, `atac_wt/atac/null/pred_sum`,
`bpnet_osknz/oct4/hAB/pred_sum`, `bpnet_osknz/oct4/hA/pred_sum`, `bpnet_osknz/oct4/hB/pred_sum`, `bpnet_osknz/oct4/null/pred_sum`) %>%
dplyr::mutate(atac_coop = ((`atac_wt/atac/hAB/pred_sum`-`atac_wt/atac/null/pred_sum`)/(`atac_wt/atac/hA/pred_sum`+`atac_wt/atac/hB/pred_sum` - 2*`atac_wt/atac/null/pred_sum`)),
oct4_coop = ((`bpnet_osknz/oct4/hAB/pred_sum` - `bpnet_osknz/oct4/hB/pred_sum` + `bpnet_osknz/oct4/null/pred_sum`)/(`bpnet_osknz/oct4/hA/pred_sum`))) %>%
as.data.frame
## motif.x motif.y atac_wt/atac/hAB/pred_sum atac_wt/atac/hA/pred_sum
## 1 Klf4 Nanog 679.9903 470.2959
## 2 Klf4 Sox2 679.9903 519.5516
## 3 Nanog Klf4 679.9903 409.8858
## 4 Nanog Sox2 679.9903 519.5516
## 5 Sox2 Klf4 679.9903 409.8858
## 6 Sox2 Nanog 679.9903 470.2959
## atac_wt/atac/hB/pred_sum atac_wt/atac/null/pred_sum
## 1 409.8858 301.5631
## 2 409.8858 310.0992
## 3 470.2959 301.5631
## 4 470.2959 353.0599
## 5 519.5516 310.0992
## 6 519.5516 353.0599
## bpnet_osknz/oct4/hAB/pred_sum bpnet_osknz/oct4/hA/pred_sum
## 1 164.7988 126.8600
## 2 164.7988 166.3281
## 3 164.7988 133.3857
## 4 164.7988 166.3281
## 5 164.7988 133.3857
## 6 164.7988 126.8600
## bpnet_osknz/oct4/hB/pred_sum bpnet_osknz/oct4/null/pred_sum atac_coop
## 1 133.3857 115.6963 1.365890
## 2 133.3857 137.3726 1.196133
## 3 126.8600 115.6963 1.365890
## 4 126.8600 131.7498 1.152268
## 5 166.3281 137.3726 1.196133
## 6 166.3281 131.7498 1.152268
## oct4_coop
## 1 1.159619
## 2 1.014775
## 3 1.151810
## 4 1.020204
## 5 1.018425
## 6 1.026489
Compare CRISPR ATAC-seq samples to make sure they’re replicable. So only compare replicates here.
atac_regions.gr<-rtracklayer::import(atac_regions.path) %>%
plyranges::mutate(across_Btbd11 = ifelse(overlapsAny(., GRanges(coop_chrom, IRanges(start = coop_genomic_window[1], end = coop_genomic_window[2]))), 'Btbd11', 'none'),
across_enhancer = ifelse(overlapsAny(., dev_enhancers.gr), 'dev_enh', across_Btbd11)) %>% arrange(desc(across_enhancer))
#Collect coverage
crispr_coop_cov.df<-mclapply(crispr_coop_atac_reps.path, function(x){
regionSums(atac_regions.gr, x)
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_coop_cov.df)<-crispr_coop_atac_reps.path
#Consolidate values
crispr_coop_cov_w_coord.df<-cbind(atac_regions.gr %>% as.data.frame, crispr_coop_cov.df) %>% arrange(desc(across_enhancer))
#Plot the sequencing depth imbalance.
plot.list<-lapply(Sys.glob('bw/mesc_Btbd11_scenario_WT_*_atac_1_cutsites.bw') %>% grep('distcontrol', ., value = T, invert = T), function(x){
crispr_coop_cov_w_coord.df$repa<-crispr_coop_cov_w_coord.df[[x]]
crispr_coop_cov_w_coord.df$repb<-crispr_coop_cov_w_coord.df[[gsub('_1_', '_2_', x)]]
plot<-ggplot(data = crispr_coop_cov_w_coord.df, aes(x = log(repa), y = log(repb)))+
geom_abline(slope = 1, intercept = 0)+
ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
# geom_point(data = crispr_coop_cov_w_coord.df %>% dplyr::filter(across_Btbd11_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
scale_color_manual(values = c( 'red', 'navy', 'gray'))+
scale_x_continuous(name = paste0('rep 1 log-reads across peaks'))+
scale_y_continuous(name = paste0('rep 2 log-reads across peaks'))+
ggtitle(basename(x) %>% gsub('_atac_1_cutsites.bw', '', .))+
theme_classic() + theme(legend.position = 'none')
return(plot)
})
rep.plot<-wrap_plots(plot.list) + plot_layout(nrow = 1, ncol = 4)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_rep_comparison.png'), rep.plot, height = 6, width = 12)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_rep_comparison.pdf'), rep.plot, height = 6, width = 12)
Do our conditions and CRISPR mutants look consistent across regions of interest that are not the CRISPR locus?
filler<-lapply(1:length(dev_enhancers.gr), function(x){
metapeak.df<-mclapply(crispr_coop_atac_combined.path, function(y){
standard_metapeak(resize(dev_enhancers.gr[x], 1, 'center'), y, downstream = 501, upstream = 500) %>%
dplyr::mutate(path = y, enhancer = dev_enhancers.gr[x]$name)
}, mc.cores = 8) %>% rbindlist() %>%
dplyr::mutate(scenario_state = basename(path) %>% gsub('mesc_Btbd11_scenario_', '', .)%>% gsub('_atac_combined_normalized.bw', '', .),
genomic_position = tss_distance + start(resize(dev_enhancers.gr[x], 1, 'center'))) %>%
tidyr::separate(col = 'scenario_state', into = c('scenario','state'), sep = '_coop_', remove = F) %>%
dplyr::mutate(state = factor(gsub('mut_', '', state), c('WT','A','B','null')))
enhancer_name<-dev_enhancers.gr[x]$name
plot<-ggplot(metapeak.df, aes(x = genomic_position, y = reads, color = state))+
geom_line()+
scale_x_continuous(name = seqnames(dev_enhancers.gr[x])) +
scale_y_continuous(name = 'RPM-normalized ATAC-seq fragments', limits = c(0, 5))+
scale_colour_manual(values = turbo(4, begin = .2, end = .8))+
facet_grid(. ~ scenario)+
ggtitle(enhancer_name)+
theme_classic()
ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Btbd11_crispr_coop_comparison.png'), plot, height = 4, width = 8)
ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Btbd11_crispr_coop_comparison.pdf'), plot, height = 4, width = 8)
return(NULL)
})
Run DESeq2 in order to measure whether CRISPR scenarios are significantly differentially enriched when we do these mutations. Since we are comparing a few features, we will contrast a couple assessments first.
#Format organization
atac_labels.df<-data.frame(filename = crispr_coop_atac_reps.path) %>%
dplyr::mutate(scenario_state = basename(crispr_coop_atac_reps.path) %>% gsub('mesc_Btbd11_scenario_', '', .) %>% gsub('_cutsites.bw', '', .)) %>%
# tidyr::separate(state, into = c('timepoint','replicate'), remove = F)
tidyr::separate(col = 'scenario_state', into = c('scenario','state'), sep = '_coop_', remove = F) %>%
tidyr::separate(col = 'state', into = c('state','rep'), sep = '_atac_', remove = T) %>%
dplyr::mutate(state = factor(gsub('mut_', '', state), c('WT','A','B','null')))
rownames(atac_labels.df)<-crispr_coop_atac_reps.path
atac_labels.df
## filename
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_1_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_1_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_2_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_2_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_1_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_1_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_2_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_2_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_1_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_1_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_2_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_2_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw
## scenario_state
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_1_cutsites.bw WT_coop_mut_A_atac_1
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_2_cutsites.bw WT_coop_mut_A_atac_2
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_1_cutsites.bw WT_coop_mut_B_atac_1
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_2_cutsites.bw WT_coop_mut_B_atac_2
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_1_cutsites.bw WT_coop_mut_null_atac_1
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_2_cutsites.bw WT_coop_mut_null_atac_2
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw WT_coop_WT_atac_1
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw WT_coop_WT_atac_2
## scenario state rep
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_1_cutsites.bw WT A 1
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_2_cutsites.bw WT A 2
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_1_cutsites.bw WT B 1
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_2_cutsites.bw WT B 2
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_1_cutsites.bw WT null 1
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_2_cutsites.bw WT null 2
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw WT WT 1
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw WT WT 2
#Consolidate counts
atac.df<-crispr_coop_cov.df
colnames(atac.df)<-crispr_coop_atac_reps.path
#Run DESeq2
dds <- DESeqDataSetFromMatrix(countData = atac.df,
colData = atac_labels.df,
design = ~ state)
dds <- DESeq(dds)
Show enrichment results.
Within each scenario, do we see significance from the WT?
deseq_comparisons.df<-lapply(c('WT'), function(x){
lapply(c('A','B','null'), function(y){
res <- results(dds, contrast = contraster(dds,
group1 = list(c("scenario", x), c("state", "WT")),
group2 = list(c("scenario", x), c("state", y)))) %>%
cbind(., atac_regions.gr %>% as.data.frame()) %>% as.data.frame %>%
dplyr::mutate(signif = cut(padj, breaks = c(0, 0.001, 0.01, 0.05, 1), include.lowest = T, labels = c('***','**','*',''))) %>%
dplyr::filter(across_enhancer == 'Btbd11') %>%
dplyr::mutate(state = y, scenario = x, comparison = paste0(x, ': WT vs ', y))
}) %>% rbindlist()
}) %>% rbindlist()
deseq_comparisons.df
## baseMean log2FoldChange lfcSE stat pvalue padj
## <num> <num> <num> <num> <num> <num>
## 1: 3693.95 0.3734261 0.06608023 5.651101 1.594232e-08 9.315719e-07
## 2: 3693.95 0.1880535 0.06588973 2.854064 4.316379e-03 1.042809e-02
## 3: 3693.95 0.6725339 0.06679151 10.069153 7.562689e-24 7.506964e-21
## seqnames start end width strand name score signalValue pValue
## <fctr> <int> <int> <int> <fctr> <char> <num> <num> <num>
## 1: chr10 85539345 85540344 1000 * <NA> 1000 8.51039 710.946
## 2: chr10 85539345 85540344 1000 * <NA> 1000 8.51039 710.946
## 3: chr10 85539345 85540344 1000 * <NA> 1000 8.51039 710.946
## qValue peak across_Btbd11 across_enhancer signif state scenario
## <num> <int> <char> <char> <fctr> <char> <char>
## 1: 706.851 500 Btbd11 Btbd11 *** A WT
## 2: 706.851 500 Btbd11 Btbd11 * B WT
## 3: 706.851 500 Btbd11 Btbd11 *** null WT
## comparison
## <char>
## 1: WT: WT vs A
## 2: WT: WT vs B
## 3: WT: WT vs null
Visualize comparison plots with statistics attached:
#Collect coverage
crispr_coop_cov_combined.df<-mclapply(crispr_coop_atac_combined.path, function(x){
log(regionSums(atac_regions.gr, x))
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_coop_cov_combined.df)<-crispr_coop_atac_combined.path
#Consolidate values
crispr_coop_cov_w_coord_combined.df<-cbind(atac_regions.gr %>% as.data.frame, crispr_coop_cov_combined.df) %>% arrange(desc(across_enhancer))
#Plot the sequencing depth imbalance.
plot.list<-lapply(c('WT'), function(x){
compare.plot.list<-lapply(c('A','B','null'), function(y){
wt_sample<-paste0('bw/mesc_Btbd11_scenario_', x, '_coop_WT_atac_combined_normalized.bw')
mut_sample<-paste0('bw/mesc_Btbd11_scenario_', x, '_coop_mut_', y, '_atac_combined_normalized.bw')
deseq.df<-deseq_comparisons.df %>% dplyr::filter(scenario == x, state ==y) %>%
data.frame(label = paste0('p=', .$padj, ', ', .$signif))
plot<-ggplot(data = crispr_coop_cov_w_coord_combined.df, aes(x = .data[[wt_sample]], y = .data[[mut_sample]]))+
geom_abline(slope = 1, intercept = 0)+
ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
# geom_point(data = crispr_coop_cov_w_coord.df %>% dplyr::filter(across_Btbd11_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
geom_text(data = deseq.df, aes(x = 8, y = 4, label = label))+
ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
scale_color_manual(values = c( 'red', 'navy', 'gray'))+
scale_x_continuous(name = paste0(' log-reads across peaks'))+
scale_y_continuous(name = paste0(' log-reads across peaks'))+
ggtitle(paste0('Scenario: ', x, ', WT vs ', y, ' comparisons'))+
theme_classic() + theme(legend.position = 'none')
return(plot)
})
compare.plot<-wrap_plots(compare.plot.list) + plot_layout(nrow = 1, ncol = 3)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_diff_comparison_scenario_', x, '.png'), compare.plot, height = 3, width = 9)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_diff_comparison_scenario_', x, '.pdf'), compare.plot, height = 3, width = 9)
})
perturbs.df<-readr::read_tsv(context_perturbs_w_bias.path) %>% dplyr::mutate(state = factor(state, c('AB','A','B'))) %>% dplyr::mutate(pred_stranded = ifelse(channel==0, pred, pred*-1))
scenarios.df<-readr::read_tsv(context_scenarios.path)
perturbs.df %>%
dplyr::group_by(state, model_name, task) %>%
dplyr::summarize(sum_pred = sum(pred)) %>%
tidyr::pivot_wider(names_from = state, values_from = sum_pred)
## # A tibble: 24 × 5
## # Groups: model_name [12]
## model_name task AB A B
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 atac_0h_fold1 atac 10932. 10209. 10060.
## 2 atac_12h_fold1 atac 9436. 9158. 9335.
## 3 atac_15h_fold1 atac 8429. 8041. 8033.
## 4 atac_3h_fold1 atac 15885. 14440. 15494.
## 5 atac_6h_fold1 atac 16919. 16014. 15792.
## 6 atac_9h_fold1 atac 9779. 9550. 9241.
## 7 atac_wt_fold1 atac 1064. 613. 1313.
## 8 atac_wt_fold2 atac 1681. 1331. 1331.
## 9 atac_wt_fold3 atac 299. 242. 278.
## 10 bpnet_osknz_fold1 klf4 271. 238. 239.
## # ℹ 14 more rows
Report relative differences
results.df<-perturbs.df %>%
dplyr::group_by(state, model_name, task) %>%
dplyr::summarize(preds = sum(pred)) %>%
tidyr::pivot_wider(names_from = state, values_from = preds) %>%
dplyr::mutate(AB_vs_B = AB/B,
log_AB_vs_B = log(AB_vs_B),
AB_vs_A = AB/A,
log_AB_vs_A = log(AB_vs_A),)
print(results.df)
## # A tibble: 24 × 9
## # Groups: model_name [12]
## model_name task AB A B AB_vs_B log_AB_vs_B AB_vs_A log_AB_vs_A
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 atac_0h_f… atac 10932. 10209. 10060. 1.09 0.0832 1.07 0.0685
## 2 atac_12h_… atac 9436. 9158. 9335. 1.01 0.0108 1.03 0.0299
## 3 atac_15h_… atac 8429. 8041. 8033. 1.05 0.0482 1.05 0.0472
## 4 atac_3h_f… atac 15885. 14440. 15494. 1.03 0.0249 1.10 0.0954
## 5 atac_6h_f… atac 16919. 16014. 15792. 1.07 0.0689 1.06 0.0549
## 6 atac_9h_f… atac 9779. 9550. 9241. 1.06 0.0566 1.02 0.0236
## 7 atac_wt_f… atac 1064. 613. 1313. 0.810 -0.211 1.74 0.551
## 8 atac_wt_f… atac 1681. 1331. 1331. 1.26 0.234 1.26 0.234
## 9 atac_wt_f… atac 299. 242. 278. 1.07 0.0707 1.24 0.211
## 10 bpnet_osk… klf4 271. 238. 239. 1.14 0.127 1.14 0.128
## # ℹ 14 more rows
results.df %>% dplyr::select(model_name,task, AB_vs_A, log_AB_vs_A, AB_vs_B, log_AB_vs_B)
## # A tibble: 24 × 6
## # Groups: model_name [12]
## model_name task AB_vs_A log_AB_vs_A AB_vs_B log_AB_vs_B
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 atac_0h_fold1 atac 1.07 0.0685 1.09 0.0832
## 2 atac_12h_fold1 atac 1.03 0.0299 1.01 0.0108
## 3 atac_15h_fold1 atac 1.05 0.0472 1.05 0.0482
## 4 atac_3h_fold1 atac 1.10 0.0954 1.03 0.0249
## 5 atac_6h_fold1 atac 1.06 0.0549 1.07 0.0689
## 6 atac_9h_fold1 atac 1.02 0.0236 1.06 0.0566
## 7 atac_wt_fold1 atac 1.74 0.551 0.810 -0.211
## 8 atac_wt_fold2 atac 1.26 0.234 1.26 0.234
## 9 atac_wt_fold3 atac 1.24 0.211 1.07 0.0707
## 10 bpnet_osknz_fold1 klf4 1.14 0.128 1.14 0.127
## # ℹ 14 more rows
For perturbations and experiments, what are the differences between the different scenarios and states?
#Plot perturbation summaries
perturbs_reads.plot<-perturbs.df %>%
dplyr::filter(model_name=='atac_wt_fold1', genomic_position %in% c(context_genomic_window[1]:context_genomic_window[2])) %>%
dplyr::group_by(state) %>%
dplyr::summarize(sum_pred = sum(pred)) %>%
ggplot(., aes(x = state, y = sum_pred))+
geom_bar(stat = 'identity')+
coord_flip()+
theme_classic()
perturbs_reads.plot
ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_perturbations_summary.png'), perturbs_reads.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_perturbations_summary.pdf'), perturbs_reads.plot, width = 7, height = 3)
reads.df<-lapply(1:2, function(z){
#Define files
atac_exp.bw.list<-list(
'AB' = paste0('../1_processing/bw/mesc_Btbd11_scenario_WT_coop_WT_atac_', z, '_cutsites_normalized.bw'),
'A' = paste0('../1_processing/bw/mesc_Akr1cl_scenario_context_mut_A_atac_', z, '_cutsites_normalized.bw'),
'B' = paste0('../1_processing/bw/mesc_Akr1cl_scenario_context_mut_B_atac_', z, '_cutsites_normalized.bw')
)
#Collect reads
atac_reads.df<-lapply(names(atac_exp.bw.list), function(x){
gr<-GRanges(context_chrom, IRanges(start =context_genomic_window[1], end = context_genomic_window[2]))
df<-data.frame(reads = regionSums(gr, atac_exp.bw.list[[x]]),
state = x,
rep = z)
}) %>% rbindlist()
}) %>% rbindlist()
#Collect summary values
reads_summary.df<-reads.df %>%
dplyr::group_by(state) %>%
dplyr::summarize(median_reads = median(reads))
exp_reads.plot<-ggplot()+
geom_bar(data = reads_summary.df, aes(x = state, y = median_reads), stat = 'identity')+
geom_point(data = reads.df, aes(x = state, y = reads))+
coord_flip()+
theme_classic()
exp_reads.plot
ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_experimental_summary.png'), exp_reads.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_experimental_summary.pdf'), exp_reads.plot, width = 7, height = 3)
Process relevant ChIP-nexus data, normalize it, and summarize reads:
nexus_rep.rds.list<-list(
'AB' = c('../1_processing/rdata/mesc_R1_sox2_nexus_15.granges.rds',
'../1_processing/rdata/mesc_R1_sox2_nexus_16.granges.rds'),
'A' = c('../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_A_sox2_nexus_1.granges.rds',
'../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_A_sox2_nexus_2.granges.rds',
'../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_A_sox2_nexus_3.granges.rds'),
'B' = c('../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_B_sox2_nexus_1.granges.rds',
'../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_B_sox2_nexus_2.granges.rds',
'../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_B_sox2_nexus_3.granges.rds')
)
reads.df<-mclapply(names(nexus_rep.rds.list), function(z){
#Collect reads
nexus_reads.df<-lapply(nexus_rep.rds.list[[z]], function(x){
rds<-readRDS(x) %>% resize(., 1, 'start')
total_reads<-length(rds)
gr<-GRanges(context_chrom, IRanges(start =context_genomic_window[1], end = context_genomic_window[2]))
reads_in_region<-overlapsAny(rds, gr, ignore.strand = T) %>% sum()
reads_in_region_rpm<-reads_in_region/total_reads * 1000000
df<-data.frame(reads = reads_in_region_rpm,
rep = x,
state = z)
}) %>% rbindlist()
}, mc.cores = 3) %>% rbindlist()
#Collect summary values
reads_summary.df<-reads.df %>%
dplyr::group_by(state) %>%
dplyr::summarize(median_reads = median(reads))
exp_reads.plot<-ggplot()+
geom_bar(data = reads_summary.df, aes(x = state, y = median_reads), stat = 'identity')+
geom_point(data = reads.df, aes(x = state, y = reads))+
coord_flip()+
theme_classic()
exp_reads.plot
ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_experimental_summary_nexus.png'), exp_reads.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_experimental_summary_nexus.pdf'), exp_reads.plot, width = 7, height = 3)
t.test(reads.df %>% dplyr::filter(state == 'AB') %>% .$reads, reads.df %>% dplyr::filter(state == 'B') %>% .$reads)
##
## Welch Two Sample t-test
##
## data: reads.df %>% dplyr::filter(state == "AB") %>% .$reads and reads.df %>% dplyr::filter(state == "B") %>% .$reads
## t = 6.2929, df = 2.9924, p-value = 0.008167
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 5.650742 17.247514
## sample estimates:
## mean of x mean of y
## 18.184660 6.735532
t.test(reads.df %>% dplyr::filter(state == 'AB') %>% .$reads, reads.df %>% dplyr::filter(state == 'A') %>% .$reads)
##
## Welch Two Sample t-test
##
## data: reads.df %>% dplyr::filter(state == "AB") %>% .$reads and reads.df %>% dplyr::filter(state == "A") %>% .$reads
## t = 14.015, df = 1.677, p-value = 0.009796
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 10.28456 22.43036
## sample estimates:
## mean of x mean of y
## 18.18466 1.82720
Plot raw data.
acc.plot<-ggplot(perturbs.df %>% dplyr::filter(task %in% c('atac', 'sox2'), grepl('atac_wt|bpnet', model_name))) +
geom_vline(xintercept = scenarios.df$genomic_center_distal, linetype = 'dashed')+
geom_vline(xintercept = scenarios.df$genomic_center_proximal, linetype = 'dashed')+
geom_line(aes(x = genomic_position, y = pred_stranded, color = state, group = interaction(channel, state)))+
scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
scale_y_continuous(name = 'Predicted ATAC-seq')+
scale_color_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
facet_grid(model_name ~ task, scales = 'free') +
theme_classic()
acc.plot
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_perturbations_raw.png'), acc.plot, width = 10, height = 20)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_perturbations_raw.pdf'), acc.plot, width = 10, height = 20)
Plot smoothed data
acc.plot<-ggplot(perturbs.df %>% dplyr::filter(model_name %in% c('atac_wt_fold1'))) +
geom_vline(xintercept = scenarios.df$genomic_center_distal, linetype = 'dashed')+
geom_vline(xintercept = scenarios.df$genomic_center_proximal, linetype = 'dashed')+
stat_smooth(aes(x = genomic_position, y = pred_stranded, color = state, group = interaction(channel, state)), method = 'loess', span = .2)+
scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
scale_y_continuous(name = 'Predicted ATAC-seq')+
scale_color_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
facet_grid(model_name ~ task, scales = 'free') +
theme_classic()
acc.plot
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_perturbations_smoothed.png'), acc.plot, width = 10, height = 5)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_perturbations_smoothed.pdf'), acc.plot, width = 10, height = 5)
atac_exp.bw.list<-list(
'AB' = 'bw/mesc_Btbd11_scenario_WT_coop_WT_atac_combined_normalized.bw',
'A' = 'bw/mesc_Akr1cl_scenario_context_mut_A_atac_combined_normalized.bw',
'B' = 'bw/mesc_Akr1cl_scenario_context_mut_B_atac_combined_normalized.bw'
)
atac_exp.df<-lapply(names(atac_exp.bw.list), function(x){
standard_metapeak_matrix(GRanges(context_chrom, IRanges(start =context_genomic_window[1], end = context_genomic_window[2])),
atac_exp.bw.list[[x]], keep_region_coordinates = T) %>%
as.data.frame() %>% transpose() %>%
dplyr::rename(pred = V1) %>%
dplyr::mutate(genomic_position = context_genomic_window[1]:context_genomic_window[2],
sample = x)
}) %>% rbindlist() %>%
dplyr::mutate(sample = factor(sample, names(atac_exp.bw.list)))
atac_exp.plot<-ggplot(atac_exp.df) +
geom_vline(xintercept = scenarios.df %>% .$genomic_center_distal, linetype = 'dashed')+
geom_vline(xintercept = scenarios.df %>% .$genomic_center_proximal, linetype = 'dashed')+
geom_line(aes(x = genomic_position, y = pred, color = sample))+
scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
# scale_x_continuous(name = paste0('Genomic position (bp)'))+
scale_y_continuous(name = 'RPM-normalized ATAC-seq fragments')+
scale_color_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
theme_classic()
atac_exp.plot
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_fragments.png'), atac_exp.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_fragments.pdf'), atac_exp.plot, width = 7, height = 3)
#Report relative fold change impacts
atac_exp.df %>% dplyr::group_by(sample) %>%
dplyr::summarize(sum_reads = sum(pred))
## # A tibble: 3 × 2
## sample sum_reads
## <fct> <dbl>
## 1 AB 434.
## 2 A 124.
## 3 B 376.
atac_exp.bw.list<-list(
'AB' = 'bw/mesc_Btbd11_scenario_WT_coop_WT_atac_cutsites_combined_normalized.bw',
'A' = 'bw/mesc_Akr1cl_scenario_context_mut_A_atac_cutsites_combined_normalized.bw',
'B' = 'bw/mesc_Akr1cl_scenario_context_mut_B_atac_cutsites_combined_normalized.bw'
)
atac_exp.df<-lapply(names(atac_exp.bw.list), function(x){
standard_metapeak_matrix(GRanges(context_chrom, IRanges(start =context_genomic_window[1], end = context_genomic_window[2])),
atac_exp.bw.list[[x]], keep_region_coordinates = T) %>%
as.data.frame() %>% transpose() %>%
dplyr::rename(pred = V1) %>%
dplyr::mutate(genomic_position = context_genomic_window[1]:context_genomic_window[2],
sample = x)
}) %>% rbindlist() %>%
dplyr::mutate(sample = factor(sample, names(atac_exp.bw.list)))
atac_exp.plot<-ggplot(atac_exp.df) +
geom_vline(xintercept = scenarios.df %>% .$genomic_center_proximal, linetype = 'dashed')+
geom_vline(xintercept = scenarios.df %>% .$genomic_center_distal, linetype = 'dashed')+
geom_line(aes(x = genomic_position, y = pred, color = sample), alpha = .5)+
scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
# scale_x_continuous(name = paste0('Genomic position (bp)'))+
scale_y_continuous(name = 'RPM-normalized ATAC-seq cuts')+
scale_color_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
theme_classic()
atac_exp.plot
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_cutsites_raw.png'), atac_exp.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_cutsites_raw.pdf'), atac_exp.plot, width = 7, height = 3)
atac_exp.plot<-ggplot(atac_exp.df) +
geom_vline(xintercept = scenarios.df %>% .$genomic_center_proximal, linetype = 'dashed')+
geom_vline(xintercept = scenarios.df %>% .$genomic_center_distal, linetype = 'dashed')+
stat_smooth(aes(x = genomic_position, y = pred, color = sample), method = 'loess', span = .2)+
scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
# scale_x_continuous(name = paste0('Genomic position (bp)'))+
scale_y_continuous(name = 'RPM-normalized ATAC-seq cuts')+
scale_color_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
theme_classic()
atac_exp.plot
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_cutsites_smoothed.png'), atac_exp.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_cutsites_smoothed.pdf'), atac_exp.plot, width = 7, height = 3)
nexus_exp.bw.list<-list(
'AB_sox2' = list(pos = 'bw/mesc_R1_sox2_nexus_combined_normalized_positive.bw',
neg = 'bw/mesc_R1_sox2_nexus_combined_normalized_negative.bw'),
'A_sox2' = list(pos = 'bw/mesc_Akr1cl_scenario_context_mut_A_sox2_nexus_combined_normalized_positive.bw',
neg = 'bw/mesc_Akr1cl_scenario_context_mut_A_sox2_nexus_combined_normalized_negative.bw'),
'B_sox2' = list(pos = 'bw/mesc_Akr1cl_scenario_context_mut_B_sox2_nexus_combined_normalized_positive.bw',
neg = 'bw/mesc_Akr1cl_scenario_context_mut_B_sox2_nexus_combined_normalized_negative.bw'),
'AB_klf4' = list(pos = 'bw/mesc_R1_klf4_nexus_combined_normalized_positive.bw',
neg = 'bw/mesc_R1_klf4_nexus_combined_normalized_negative.bw'),
'A_klf4' = list(pos = 'bw/mesc_Akr1cl_scenario_context_mut_A_klf4_nexus_combined_normalized_positive.bw',
neg = 'bw/mesc_Akr1cl_scenario_context_mut_A_klf4_nexus_combined_normalized_negative.bw'),
'B_klf4' = list(pos = 'bw/mesc_Akr1cl_scenario_context_mut_B_klf4_nexus_combined_normalized_positive.bw',
neg = 'bw/mesc_Akr1cl_scenario_context_mut_B_klf4_nexus_combined_normalized_negative.bw')
)
nexus_exp.df<-lapply(names(nexus_exp.bw.list), function(x){
mat<-exo_metapeak_matrix(GRanges(context_chrom, IRanges(start =context_genomic_window[1], end = context_genomic_window[2])),
nexus_exp.bw.list[[x]], keep_region_coordinates = T)
df<-rbind(
mat$pos %>%
as.data.frame() %>% transpose() %>%
dplyr::rename(pred = V1) %>%
dplyr::mutate(genomic_position = context_genomic_window[1]:context_genomic_window[2],
sample = x,
channel = 'pos'),
mat$neg %>%
as.data.frame() %>% transpose() %>%
dplyr::rename(pred = V1) %>%
dplyr::mutate(genomic_position = context_genomic_window[1]:context_genomic_window[2],
sample = x,
pred = pred*-1,
channel = 'neg')
)
return(df)
}) %>% rbindlist() %>%
dplyr::mutate(sample = factor(sample, names(nexus_exp.bw.list))) %>%
tidyr::separate(sample, c('state', 'TF'), sep = '_') %>%
dplyr::mutate(state = factor(state, levels = c('AB','A','B')))
nexus_exp.plot<-ggplot(nexus_exp.df) +
geom_vline(xintercept = scenarios.df %>% .$genomic_center_proximal, linetype = 'dashed')+
geom_vline(xintercept = scenarios.df %>% .$genomic_center_distal, linetype = 'dashed')+
geom_area(aes(x = genomic_position, y = pred, fill = state, group = interaction(state, channel)))+
scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
# scale_x_continuous(name = paste0('Genomic position (bp)'))+
scale_y_continuous(name = 'RPM-normalized ChIP-nexus reads')+
scale_fill_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
facet_grid(state ~ TF)+
theme_classic()
nexus_exp.plot
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_nexus.png'), nexus_exp.plot, width = 7, height = 7)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_nexus.pdf'), nexus_exp.plot, width = 7, height = 7)
curated_enhancer_boundaries.gr<-GRanges(context_chrom, IRanges(context_genomic_window[1], context_genomic_window[2]))
Generate contribution scores of mapped motifs.
curated_enhancer.df<-data.frame(
acc_contrib = standard_metapeak_matrix(regions.gr = curated_enhancer_boundaries.gr, sample.cov = 'shap/atac_wt_fold1_atac_counts.bw',
keep_region_coordinates = T) %>% t(),
seq = getSeq(bsgenome, curated_enhancer_boundaries.gr, as.character = T) %>% stringr::str_split_1(pattern = ''),
position = start(curated_enhancer_boundaries.gr):end(curated_enhancer_boundaries.gr)
)
# Generate sequence logo
acc_contrib.plot<-curated_enhancer.df %>%
tidyr::pivot_wider(names_from = seq, values_from = acc_contrib) %>%
dplyr::select(A, C, G, T) %>% as.matrix() %>% t() %>%
ggseqlogo(., method='custom', seq_type='dna') +
scale_y_continuous(name = 'Acc. contribution.')+
scale_x_discrete(name = 'Genomic position (chr10, bp)')+
ggtitle('CRISPR enhancer')
acc_contrib.plot
ggsave(paste0(figure_filepath, '/crispr_context_candidate_contrib.png'), acc_contrib.plot, height = 2, width = 20)
ggsave(paste0(figure_filepath, '/crispr_context_candidate_contrib.pdf'), acc_contrib.plot, height = 2, width = 20)
seqs.fa<-seqinr::read.fasta('fasta/crispr_context_seqs.fa')
filler<-mclapply(Sys.glob('shap/crispr/crispr_context_*_counts.h5'), function(y){
contrib.h5<-rhdf5::h5read(y, '/')
hyp_scores.list<-apply(contrib.h5$hyp_scores, 3, function(x){
as.data.frame(x) %>% transpose()
})
input_seqs.list<-apply(contrib.h5$input_seqs, 3, function(x){
df<-as.data.frame(x)
df<-dplyr::mutate_all(df, function(x) as.numeric(as.character(x)))
return(df %>% transpose)
})
contrib.list<-lapply(1:length(hyp_scores.list), function(x){
mat<-input_seqs.list[[x]] * hyp_scores.list[[x]]
colnames(mat)<-c('A','C','G','T')
return(mat %>% t())
})
names(contrib.list)<-names(seqs.fa)
#Visualize contribution scores
contrib.plot<- ggseqlogo(contrib.list, method='custom', seq_type='dna') +
scale_y_continuous(name = 'Contribution') +
scale_x_continuous(limits = c(800, 1100)) +
facet_wrap(~seq_group, ncol=1, scales='free_x') +
theme(axis.line = element_line(), axis.ticks = element_line())+
ggtitle(y)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_contribution_', basename(y), '.png'), contrib.plot, width = 16, height = 10)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_contribution_', basename(y), '.pdf'), contrib.plot, width = 16, height = 10)
}, mc.cores = 6)
Compare CRISPR ATAC-seq samples to make sure they’re replicable. So only compare replicates here.
atac_regions.gr<-rtracklayer::import(atac_regions.path) %>%
plyranges::mutate(across_Akr1cl = ifelse(overlapsAny(., GRanges(context_chrom, IRanges(start = context_genomic_window[1], end = context_genomic_window[2]))), 'Akr1cl','none'),
across_enhancer = ifelse(overlapsAny(., dev_enhancers.gr), 'dev_enh', across_Akr1cl))
#Collect coverage
crispr_context_cov.df<-mclapply(crispr_context_atac_reps.path, function(x){
regionSums(atac_regions.gr, x)
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_context_cov.df)<-crispr_context_atac_reps.path
#Consolidate values
crispr_context_cov_w_coord.df<-cbind(atac_regions.gr %>% as.data.frame, crispr_context_cov.df) %>% arrange(desc(across_enhancer))
#Plot the sequencing depth imbalance.
plot.list<-mclapply(crispr_context_atac_reps.path %>% grep('_atac_1_cutsites.bw', ., value = T), function(x){
crispr_context_cov_w_coord.df$repa<-crispr_context_cov_w_coord.df[[x]]
crispr_context_cov_w_coord.df$repb<-crispr_context_cov_w_coord.df[[gsub('_1_', '_2_', x)]]
plot<-ggplot(data = crispr_context_cov_w_coord.df, aes(x = log(repa), y = log(repb)))+
geom_abline(slope = 1, intercept = 0)+
ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
# geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
scale_color_manual(values = c( 'red', 'navy', 'gray'))+
scale_x_continuous(name = paste0('rep 1 log-reads across peaks'))+
scale_y_continuous(name = paste0('rep 2 log-reads across peaks'))+
ggtitle(basename(x) %>% gsub('_atac_1_cutsites.bw', '', .))+
theme_classic() + theme(legend.position = 'none')
return(plot)
}, mc.cores = 3)
rep.plot<-wrap_plots(plot.list) + plot_layout(nrow = 1, ncol = 3)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_rep_comparison.png'), rep.plot, height = 3, width = 9)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_rep_comparison.pdf'), rep.plot, height = 3, width = 9)
Compare CRISPR ChIP-nexus replicates.
sox2_regions.gr<-rtracklayer::import(sox2_regions.path) %>%
plyranges::mutate(across_Akr1cl = ifelse(overlapsAny(., GRanges(context_chrom, IRanges(start = context_genomic_window[1], end = context_genomic_window[2]))), 'Akr1cl','none'),
across_enhancer = ifelse(overlapsAny(., dev_enhancers.gr), 'dev_enh', across_Akr1cl))
#Collect coverage
crispr_context_cov.df<-mclapply(crispr_context_nexus_reps.path, function(x){
regionSums(sox2_regions.gr, x) + abs(regionSums(sox2_regions.gr, gsub('positive','negative',x)))
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_context_cov.df)<-crispr_context_nexus_reps.path
#Consolidate values
crispr_context_cov_w_coord.df<-cbind(sox2_regions.gr %>% as.data.frame, crispr_context_cov.df) %>% arrange(desc(across_enhancer))
#Plot the sequencing depth imbalance.
plot.list<-mclapply(crispr_context_nexus_reps.path %>% grep('_sox2_nexus_1_positive.bw', ., value = T), function(x){
crispr_context_cov_w_coord.df$repa<-crispr_context_cov_w_coord.df[[x]]
crispr_context_cov_w_coord.df$repb<-crispr_context_cov_w_coord.df[[gsub('_1_', '_2_', x)]]
crispr_context_cov_w_coord.df$repc<-crispr_context_cov_w_coord.df[[gsub('_1_', '_3_', x)]]
plotab<-ggplot(data = crispr_context_cov_w_coord.df, aes(x = log(repa), y = log(repb)))+
geom_abline(slope = 1, intercept = 0)+
ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
# geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
scale_color_manual(values = c( 'red', 'navy', 'gray'))+
scale_x_continuous(name = paste0('rep 1 log-reads across peaks'))+
scale_y_continuous(name = paste0('rep 2 log-reads across peaks'))+
ggtitle(basename(x) %>% gsub('_sox2_1_cutsites.bw', '', .))+
theme_classic() + theme(legend.position = 'none')
plotac<-ggplot(data = crispr_context_cov_w_coord.df, aes(x = log(repa), y = log(repc)))+
geom_abline(slope = 1, intercept = 0)+
ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
# geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
scale_color_manual(values = c( 'red', 'navy', 'gray'))+
scale_x_continuous(name = paste0('rep 1 log-reads across peaks'))+
scale_y_continuous(name = paste0('rep 3 log-reads across peaks'))+
ggtitle(basename(x) %>% gsub('_sox2_1_cutsites.bw', '', .))+
theme_classic() + theme(legend.position = 'none')
plotbc<-ggplot(data = crispr_context_cov_w_coord.df, aes(x = log(repb), y = log(repc)))+
geom_abline(slope = 1, intercept = 0)+
ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
# geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
scale_color_manual(values = c( 'red', 'navy', 'gray'))+
scale_x_continuous(name = paste0('rep 2 log-reads across peaks'))+
scale_y_continuous(name = paste0('rep 3 log-reads across peaks'))+
ggtitle(basename(x) %>% gsub('_sox2_1_cutsites.bw', '', .))+
theme_classic() + theme(legend.position = 'none')
return(plotab + plotac + plotbc)
}, mc.cores = 3)
rep.plot<-wrap_plots(plot.list) + plot_layout(nrow = 2, ncol = 1)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_nexus_rep_comparison.png'), rep.plot, height = 6, width = 9)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_nexus_rep_comparison.pdf'), rep.plot, height = 6, width = 9)
plotwt<-ggplot(data = crispr_context_cov_w_coord.df, aes(x = log(`bw/mesc_R1_sox2_nexus_15_positive.bw`), y = log(`bw/mesc_R1_sox2_nexus_16_positive.bw`)))+
geom_abline(slope = 1, intercept = 0)+
ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
# geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
scale_color_manual(values = c( 'red', 'navy', 'gray'))+
scale_x_continuous(name = paste0('rep 1 log-reads across peaks'))+
scale_y_continuous(name = paste0('rep 2 log-reads across peaks'))+
ggtitle('WT')+
theme_classic() + theme(legend.position = 'none')
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_nexus_wt_rep_comparison.png'), plotwt, height = 3, width = 3)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_nexus_wt_rep_comparison.pdf'), plotwt, height = 3, width = 3)
Do our conditions and CRISPR mutants look consistent across regions of interest that are not the CRISPR locus?
filler<-lapply(1:length(dev_enhancers.gr), function(x){
metapeak.df<-mclapply(crispr_context_atac_combined.path, function(y){
standard_metapeak(resize(dev_enhancers.gr[x], 1, 'center'), y, downstream = 501, upstream = 500) %>%
dplyr::mutate(path = y, enhancer = dev_enhancers.gr[x]$name)
}, mc.cores = 8) %>% rbindlist() %>%
dplyr::mutate(scenario_state = basename(path) %>% gsub('mesc_Akr1cl_scenario_', '', .) %>% gsub('mesc_Btbd11_scenario_', '', .) %>% gsub('_atac_combined_normalized.bw', '', .),
genomic_position = tss_distance + start(resize(dev_enhancers.gr[x], 1, 'center'))) %>%
tidyr::separate(col = 'scenario_state', into = c('scenario','state'), sep = 'context_|coop_', remove = F) %>%
dplyr::mutate(state = factor(gsub('mut_', '', state), c('WT','A','B')))
enhancer_name<-dev_enhancers.gr[x]$name
plot<-ggplot(metapeak.df, aes(x = genomic_position, y = reads, color = state))+
geom_line()+
scale_x_continuous(name = seqnames(dev_enhancers.gr[x])) +
scale_y_continuous(name = 'RPM-normalized ATAC-seq fragments', limits = c(0, 5))+
scale_colour_manual(values = turbo(4, begin = .2, end = .8))+
ggtitle(enhancer_name)+
theme_classic()
ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Akr1cl_crispr_context_comparison.png'), plot, height = 4, width = 4)
ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Akr1cl_crispr_context_comparison.pdf'), plot, height = 4, width = 4)
return(NULL)
})
Plot ChIP-nexus replicates as well
filler<-lapply(1:length(dev_enhancers.gr), function(x){
metapeak.df<-mclapply(crispr_context_nexus_combined.path, function(y){
exo_metapeak(resize(dev_enhancers.gr[x], 1, 'center'), list(pos = y, neg = gsub('positive','negative', y)), downstream = 501, upstream = 500) %>%
dplyr::mutate(path = y, enhancer = dev_enhancers.gr[x]$name)
}, mc.cores = 8) %>% rbindlist() %>%
dplyr::mutate(scenario_state = basename(path) %>% gsub('mesc_Akr1cl_scenario_', '', .) %>% gsub('mesc_R1', 'context_WT', .) %>% gsub('_sox2_nexus_combined_normalized_positive.bw', '', .),
genomic_position = tss_distance + start(resize(dev_enhancers.gr[x], 1, 'center'))) %>%
tidyr::separate(col = 'scenario_state', into = c('scenario','state'), sep = 'context_|coop_', remove = F) %>%
dplyr::mutate(state = factor(gsub('mut_', '', state), c('WT','A','B')))
# metapeak.df$reads[metapeak.df$strand=='-']<-metapeak.df$reads[metapeak.df$strand=='-']*-1
enhancer_name<-dev_enhancers.gr[x]$name
plot<-ggplot(metapeak.df, aes(x = genomic_position, y = reads, color = state, group = interaction(strand, state)))+
geom_line()+
scale_x_continuous(name = seqnames(dev_enhancers.gr[x])) +
scale_y_continuous(name = 'RPM-normalized sox2-seq fragments')+
scale_colour_manual(values = turbo(4, begin = .2, end = .8))+
ggtitle(enhancer_name)+
theme_classic()
ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Akr1cl_crispr_context_nexus_comparison.png'), plot, height = 4, width = 4)
ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Akr1cl_crispr_context_nexus_comparison.pdf'), plot, height = 4, width = 4)
return(NULL)
})
Run DESeq2 in order to measure whether CRISPR scenarios are significantly differentially enriched when we do these mutations. Since we are comparing a few features, we will contrast a couple assessments first.
#Collect coverage
crispr_context_cov.df<-mclapply(crispr_context_atac_reps.path, function(x){
regionSums(atac_regions.gr, x)
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_context_cov.df)<-crispr_context_atac_reps.path
#Format organization
atac_labels.df<-data.frame(filename = colnames(crispr_context_cov.df)) %>%
dplyr::mutate(state = basename(crispr_context_atac_reps.path) %>% gsub('mesc_Akr1cl_scenario_context_', '', .) %>% gsub('mesc_Btbd11_scenario_WT_coop_', '', .) %>% gsub('_cutsites.bw', '', .)) %>%
# tidyr::separate(state, into = c('timepoint','replicate'), remove = F)
tidyr::separate(col = 'state', into = c('state','rep'), sep = '_atac_', remove = T) %>%
dplyr::mutate(state = factor(gsub('mut_', '', state), c('WT','A','B','null')))
rownames(atac_labels.df)<-crispr_context_atac_reps.path
atac_labels.df
## filename
## bw/mesc_Akr1cl_scenario_context_mut_A_atac_1_cutsites.bw bw/mesc_Akr1cl_scenario_context_mut_A_atac_1_cutsites.bw
## bw/mesc_Akr1cl_scenario_context_mut_A_atac_2_cutsites.bw bw/mesc_Akr1cl_scenario_context_mut_A_atac_2_cutsites.bw
## bw/mesc_Akr1cl_scenario_context_mut_B_atac_1_cutsites.bw bw/mesc_Akr1cl_scenario_context_mut_B_atac_1_cutsites.bw
## bw/mesc_Akr1cl_scenario_context_mut_B_atac_2_cutsites.bw bw/mesc_Akr1cl_scenario_context_mut_B_atac_2_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw
## state rep
## bw/mesc_Akr1cl_scenario_context_mut_A_atac_1_cutsites.bw A 1
## bw/mesc_Akr1cl_scenario_context_mut_A_atac_2_cutsites.bw A 2
## bw/mesc_Akr1cl_scenario_context_mut_B_atac_1_cutsites.bw B 1
## bw/mesc_Akr1cl_scenario_context_mut_B_atac_2_cutsites.bw B 2
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw WT 1
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw WT 2
#Consolidate counts
atac.df<-crispr_context_cov.df
colnames(atac.df)<-crispr_context_atac_reps.path
#Run DESeq2
dds <- DESeqDataSetFromMatrix(countData = atac.df,
colData = atac_labels.df,
design = ~ state)
dds <- DESeq(dds)
Show enrichment results.
deseq_comparisons.df<-lapply(c('A','B'), function(y){
res <- results(dds, contrast = c('state', 'WT', y)) %>%
cbind(., atac_regions.gr %>% as.data.frame()) %>% as.data.frame %>%
dplyr::mutate(signif = cut(padj, breaks = c(0, 0.001, 0.01, 0.05, 1), include.lowest = T, labels = c('***','**','*',''))) %>%
dplyr::filter(across_enhancer == 'Akr1cl') %>%
dplyr::mutate(state = y, comparison = paste0('Akr1cl enhancer: WT vs ', y))
}) %>% rbindlist()
deseq_comparisons.df
## baseMean log2FoldChange lfcSE stat pvalue padj
## <num> <num> <num> <num> <num> <num>
## 1: 931.3056 1.6677730 0.09883465 16.874377 6.945437e-64 4.114899e-61
## 2: 931.3056 0.2282224 0.08967963 2.544863 1.093206e-02 2.882342e-02
## seqnames start end width strand name score signalValue pValue
## <fctr> <int> <int> <int> <fctr> <char> <num> <num> <num>
## 1: chr1 65037228 65038227 1000 * <NA> 1000 8.6952 385.814
## 2: chr1 65037228 65038227 1000 * <NA> 1000 8.6952 385.814
## qValue peak across_Akr1cl across_enhancer signif state
## <num> <int> <char> <char> <fctr> <char>
## 1: 382.556 500 Akr1cl Akr1cl *** A
## 2: 382.556 500 Akr1cl Akr1cl * B
## comparison
## <char>
## 1: Akr1cl enhancer: WT vs A
## 2: Akr1cl enhancer: WT vs B
Visualize comparison plots with statistics attached:
#Collect coverage
crispr_context_cov_combined.df<-mclapply(crispr_context_atac_combined.path, function(x){
log(regionSums(atac_regions.gr, x))
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_context_cov_combined.df)<-crispr_context_atac_combined.path
#Consolidate values
crispr_context_cov_w_coord_combined.df<-cbind(atac_regions.gr %>% as.data.frame, crispr_context_cov_combined.df) %>% arrange(desc(across_enhancer))
#Plot the sequencing depth imbalance.
compare.plot.list<-lapply(c('A','B'), function(y){
wt_sample<-paste0('bw/mesc_Btbd11_scenario_WT_coop_WT_atac_combined_normalized.bw')
mut_sample<-paste0('bw/mesc_Akr1cl_scenario_context_mut_', y, '_atac_combined_normalized.bw')
deseq.df<-deseq_comparisons.df %>% dplyr::filter(state ==y) %>%
data.frame(label = paste0('p=', .$padj, ', ', .$signif))
plot<-ggplot(data = crispr_context_cov_w_coord_combined.df, aes(x = .data[[wt_sample]], y = .data[[mut_sample]]))+
geom_abline(slope = 1, intercept = 0)+
ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
# geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
geom_text(data = deseq.df, aes(x = 8, y = 4, label = label))+
ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
scale_color_manual(values = c( 'red', 'navy', 'gray'))+
scale_x_continuous(name = paste0(' log-reads across peaks'))+
scale_y_continuous(name = paste0(' log-reads across peaks'))+
ggtitle(paste0('Scenario context WT vs ', y, ' comparisons'))+
theme_classic() + theme(legend.position = 'none')
return(plot)
})
compare.plot<-wrap_plots(compare.plot.list) + plot_layout(nrow = 1, ncol = 2)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_diff_comparison.png'), compare.plot, height = 3, width = 6)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_diff_comparison.pdf'), compare.plot, height = 3, width = 6)
For the purposes of reproducibility, the R/Bioconductor session information is printed below:
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Rocky Linux 8.9 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so; LAPACK version 3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/Chicago
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] DESeq2_1.44.0
## [2] SummarizedExperiment_1.34.0
## [3] MatrixGenerics_1.16.0
## [4] matrixStats_1.5.0
## [5] rhdf5_2.48.0
## [6] ggseqlogo_0.2
## [7] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
## [8] GenomicFeatures_1.56.0
## [9] AnnotationDbi_1.66.0
## [10] Biobase_2.64.0
## [11] BSgenome.Mmusculus.UCSC.mm10_1.4.3
## [12] BSgenome_1.72.0
## [13] BiocIO_1.14.0
## [14] viridis_0.6.5
## [15] viridisLite_0.4.2
## [16] digest_0.6.37
## [17] pander_0.6.6
## [18] lattice_0.22-6
## [19] testit_0.13
## [20] readr_2.1.5
## [21] patchwork_1.3.0
## [22] data.table_1.17.0
## [23] dplyr_1.1.4
## [24] Rsamtools_2.20.0
## [25] plyranges_1.24.0
## [26] reshape2_1.4.4
## [27] ggplot2_3.5.2
## [28] Biostrings_2.72.1
## [29] XVector_0.44.0
## [30] magrittr_2.0.3
## [31] rtracklayer_1.64.0
## [32] GenomicRanges_1.56.2
## [33] GenomeInfoDb_1.40.1
## [34] IRanges_2.38.1
## [35] S4Vectors_0.42.1
## [36] BiocGenerics_0.50.0
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.17.1 jsonlite_1.8.9 ggbeeswarm_0.7.2
## [4] farver_2.1.2 rmarkdown_2.29 zlibbioc_1.50.0
## [7] ragg_1.3.3 vctrs_0.6.5 Cairo_1.6-2
## [10] memoise_2.0.1 RCurl_1.98-1.16 rstatix_0.7.2
## [13] htmltools_0.5.8.1 S4Arrays_1.4.1 curl_6.2.1
## [16] broom_1.0.7 cellranger_1.1.0 Rhdf5lib_1.26.0
## [19] Formula_1.2-5 SparseArray_1.4.8 sass_0.4.9
## [22] bslib_0.8.0 plyr_1.8.9 cachem_1.1.0
## [25] GenomicAlignments_1.40.0 lifecycle_1.0.4 pkgconfig_2.0.3
## [28] Matrix_1.7-0 R6_2.5.1 fastmap_1.2.0
## [31] GenomeInfoDbData_1.2.12 colorspace_2.1-1 textshaping_1.0.0
## [34] RSQLite_2.3.9 ggpubr_0.6.0 labeling_0.4.3
## [37] httr_1.4.7 abind_1.4-8 mgcv_1.9-1
## [40] compiler_4.4.1 bit64_4.5.2 withr_3.0.2
## [43] backports_1.5.0 BiocParallel_1.38.0 carData_3.0-5
## [46] DBI_1.2.3 ggsignif_0.6.4 MASS_7.3-60.2
## [49] DelayedArray_0.30.1 rjson_0.2.23 tools_4.4.1
## [52] vipor_0.4.7 beeswarm_0.4.0 glue_1.8.0
## [55] restfulr_0.0.15 nlme_3.1-164 rhdf5filters_1.16.0
## [58] grid_4.4.1 ade4_1.7-23 generics_0.1.3
## [61] seqinr_4.2-36 gtable_0.3.6 tzdb_0.4.0
## [64] tidyr_1.3.1 hms_1.1.3 car_3.1-3
## [67] utf8_1.2.4 pillar_1.10.2 stringr_1.5.1
## [70] vroom_1.6.5 splines_4.4.1 bit_4.6.0
## [73] tidyselect_1.2.1 locfit_1.5-9.10 knitr_1.50
## [76] gridExtra_2.3 xfun_0.51 stringi_1.8.4
## [79] UCSC.utils_1.0.0 yaml_2.3.10 evaluate_1.0.3
## [82] codetools_0.2-20 tibble_3.2.1 cli_3.6.3
## [85] systemfonts_1.2.3 munsell_0.5.1 jquerylib_0.1.4
## [88] Rcpp_1.0.14 readxl_1.4.5 png_0.1-8
## [91] XML_3.99-0.18 ggrastr_1.0.2 blob_1.2.4
## [94] bitops_1.0-9 scales_1.3.0 purrr_1.0.2
## [97] crayon_1.5.3 rlang_1.1.4 KEGGREST_1.44.1